/* 
**THIS program reads in 'bh-02a-masterCallSOD.dta', created in bh2020-sub-02a-mergeCallSOD.do.
This program splits this master dataset into two groups of location-based datasets.
(see master.do file for additional information on this step) 
*/

local out2020_dir <place your filepaths here>
cd "`out2020_dir'"

use bh-02a-masterCallSOD,clear

rename depsumbr 		brnchdep 	//Branch Deposits
rename rssdid 			ent			//Bank-Level id
rename rssd9348 		bhc 		//BHC-Level id 




	/*Note on rssd9348/bhc: 
	As discussed in bhreadme.docx, at no point are Bank Holding Company Regulatory Report Form (Y9C) data used. 
	Rather, we  only use the id of a given bank's Top Holder.
	In other words, "rssd9348/bhc" here is just a shorthand for: 
	"which type of institution-id are we using to organize a location's deposits"*/

	
*Make shorter variable names - Stata character maximum becomes relevant in the next do file, 04a.
*loan ratios
rename renpl_ratio 		renR
rename npl_ratio   		nplR

*asset ratios
rename npa_ratio_2   	npaR2	
rename renpa_ratio_2 	renaR2  



*keep relevant variables				
local ren_rats 	"renR  	renaR2"
local npl_rats 	"nplR  	npaR2"
local id_vars "dateq stalpbr msabr ent bhc brnchdep rcon9804 off_num"

keep 	`id_vars' `ren_rats' `npl_rats'
order 	`id_vars' `ren_rats' `npl_rats'


/*TOTAL DEPOSITS BY BANK / BHC ID
In this step, we create the denominator of the 'location-to-bank' deposit share, i.e., an entity's total deposits 
in a quarter, regardless of location. This is to get a sense of how important a location is too an institution.

Note: We do not use call report data on total deposits of the headquarter bank as a denominator (e.g., call report variable rcfd2200) for two reasons:
1) Even in Q2 (where both call report and SOD data exist), small discrepancies exist between the SOD and the Call Report total deposits due to differences in the timing 
of submissions.
2) It is desirable to have the branch locations be internally consistent with the sum of a bank's branch locations. 

Therefore, we create our "bank total deposits" here by summing a bank's total deposits across all branch locations before splitting the data by location.

*/

local ent_bhcs "ent bhc"
foreach ent_bhc of local ent_bhcs {
	bys dateq `ent_bhc':egen double	`ent_bhc'_2200	= total(brnchdep) 
	format `ent_bhc'_2200 %20.0gc	
}


compress, nocoalesce //reduces size of files where possible



*CREATE LOCATION-BASED DATASETS
*Note: Datasets contain branch level deposit data along with relevant entity-level data (*_2200, for denominator of 'location-to-bank' ratio)]


*MSA - Keep if location has an MSA code (msabr!=0) - msabr is relevant aggregation level

	preserve
	
		keep if msabr!=0
		drop stalpbr
		
		duplicates drop dateq msabr ent off_num,force
			
			*###########
			save bh-03a-MSA-Locations,replace
			*###########

	restore
	
	
/*nonMSA / RURAL - Keep if location doesn't have an MSA code (msabr==0):
	- these (rural) locations are then aggregated by state (stalpbr) to get our state-level measure of nMSA bank health*/
	
 preserve
	
		keep if msabr==0
		duplicates drop dateq stalpbr ent off_num,force
		drop msabr
		
			*###########
			save bh-03b-nMSA-Locations,replace
			*###########

	restore



	
*STATE - Don't drop anything - State measures of BH include both rural and MSA data (aggregation occurs at state level) 
	preserve
	
		duplicates drop dateq stalpbr ent off_num,force
		drop msabr
		
			*###########
			save bh-03c-ST-Locations,replace
			*###########

	restore


	
*=======================================================================
*REFIX: CREATES VERSIONS OF DATASETS THAT EXCLUDE 034 FORM FILERS
*=======================================================================
/*
	*Note: FFIEC 034 files don't report full renpl measures until 1985q2, and therefore must be excluded from the denominator of a location's 
	deposit shares and the renpl BH measures for the relevant quarters and locations.

	See bhreadme.docx and comments in master do file (bh2020-sub-00) for more details on this step.

	*Note: The following code repeats the steps from above but with the following sample restrictions: 
		1) Each location-based file will only contain data prior to 1985q2
		2) Each location-based file will exclude institutions that file the FFIEC 034 form 
*/


use bh-02a-masterCallSOD,clear

keep if dateq<=tq(1985q1)
drop if rcon9804==54


rename depsumbr 		brnchdep 	//Branch Deposits
rename rssdid 			ent			//Bank-Level id
rename rssd9348 		bhc 		//BHC-Level id 


*loan ratio
rename renpl_ratio 		renR

*asset ratio
rename renpa_ratio_2 	renaR2 


*keep relevant variables				
local ren_rats 	"renR renaR2"
local id_vars "dateq stalpbr msabr ent bhc brnchdep rcon9804 off_num"

keep 	`id_vars' `ren_rats' 
order 	`id_vars' `ren_rats' 

/*TOTAL DEPOSITS BY BANK / BHC ID */

local ent_bhcs "ent bhc"
foreach ent_bhc of local ent_bhcs {
	bys dateq `ent_bhc':egen double	`ent_bhc'_2200	= total(brnchdep) 
	format `ent_bhc'_2200 %20.0gc	
}


compress, nocoalesce 



*CREATE LOCATION-BASED DATASETS FOR 034 FILER FIX -- created the same way we did above for measures over the full time horizon.

*MSA 
	preserve
	
		keep if msabr!=0
		drop stalpbr
		
		duplicates drop dateq msabr ent off_num,force
		
			*###########
			save bh-03d-MSA-Locations_refix,replace
			*###########
		
	restore
	
	
*nonMSA / RURAL 
	preserve
	
		keep if msabr==0
		duplicates drop dateq stalpbr ent off_num,force
		drop msabr
		
			*###########
			save bh-03e-nMSA-Locations_refix,replace
			*###########

	restore


*STATE - 
	preserve
	
		duplicates drop dateq stalpbr ent off_num,force
		drop msabr
		
			*###########
			save bh-03f-ST-Locations_refix,replace
			*###########
	restore

























